print_estimates <- function(estimates){
  mutate(estimates, estimate_print = case_when(
    pvalue<0.001 ~ paste0(round(estimate,3),"$^{***}$"),
    pvalue<0.01 ~ paste0(round(estimate,3),"$^{**}$"),
    pvalue<0.05 ~ paste0(round(estimate,3),"$^{*}$"),
    TRUE ~ as.character(round(estimate,3))),
    standard.error_print=paste0("(",round(std_error,3),")")) %>%
    select(ends_with("print")) %>%
    apply(.,2,paste,collapse = " & ")
}

# Make dataset with lags and leads
# variable names should be strings
# years should be integer (2 means have two)
# compare var is what we are making comparisons within (i.e., state_year)
make_boot_data <- function(treat_var,untreat_var,time_var, compare_var, unit_var, d_var,
                           lead_yrs, lag_yrs, lag_control, df){
  data_subset <- df %>%
    select(all_of(c(treat_var, untreat_var, d_var, compare_var, unit_var, time_var,lag_control))) %>%
    rename(compare_var = compare_var, year = time_var)
  
  boot_data <- data_subset
  # Get leads
  for (i in 1:lead_yrs) {
    boot_data <- boot_data %>%
      left_join(mutate(select(data_subset, -compare_var), year=year-i), by=c("year",unit_var),
                suffix=c("",paste0("_t",i)))
  }
  # Get lags
  for (j in 1:lag_yrs) {
    boot_data <- boot_data %>%
      left_join(mutate(select(data_subset,-compare_var), year = year+j), by=c("year",unit_var), 
                suffix=c("",paste0("_p",j)))
  }
  ## Determine if unit was untreated for full lag time
  boot_data$treat_prior_sum <- apply(select(boot_data,starts_with(paste0(lag_control,"_p"))),
                                     1, sum)
  boot_data$treat_sum <- apply(select(boot_data,starts_with(paste0(treat_var,"_t"))),
                                     1, sum) + pull(boot_data,treat_var)
  boot_data$control_sum <- apply(select(boot_data,starts_with(paste0(untreat_var,"_t"))),
                               1, sum) + pull(boot_data,untreat_var)
  # treated units go from untreated the whole lag period to treated the whole lead period
  boot_data$treated <- ifelse(boot_data$treat_prior_sum==lag_yrs & 
                                boot_data$treat_sum==lead_yrs+1,1,0)
  # control units go from untreated to whole lag period to untreated the whole lead period
  boot_data$control <- ifelse(boot_data$treat_prior_sum==lag_yrs & 
                                boot_data$control_sum==lead_yrs+1,1,0)
  ## Calculate Weights
  # For Control Units: 
  # Numerator=# of treated obs in a state in a year
  # Denominator=# of control obs in a state in a year
  # For Treated Units: Weight is 1 provided there is at least one control unit
  boot_data <- boot_data %>%
    group_by(compare_var) %>%
    mutate(num_treated = sum(treated,na.rm=T)) %>%
    mutate(num_control = sum(control,na.rm=T)) %>%
    ungroup %>%
    mutate(obs_weight = case_when(
      treated == 1 & num_control > 0 & !is.na(num_control) & 
        !is.na(treat_prior_sum) & !is.na(treat_sum) ~ 1,
      control == 1 & num_treated > 0 & !is.na(num_treated) &
        !is.na(treat_prior_sum) & !is.na(treat_sum) ~ - num_treated / num_control,
      TRUE ~ 0
    ))
  return(boot_data)
}

# # To test the boot data function
# boot_data <- data.frame(unt_p1 = c(1,1,1,0,0),
#                         unt_p2 = c(1,1,1,0,1),
#                         tr = c(1,1,0,1,1),
#                         tr_t1 = c(1,1,0,1,1),
#                         tr_t2 = c(1,0,0,1,1))
# treat_var <- "tr"
# untreat_var <- "unt"
# lag_yrs <- 2
# lead_yrs <- 2

make_boot_data_old <- function(treat_var,untreat_var,time_var, compare_var, unit_var, d_var,
                           lead_yrs, lag_yrs, df){
  data_subset <- df %>%
    select(all_of(c(treat_var, untreat_var, d_var, compare_var, unit_var, time_var))) %>%
    rename(compare_var = compare_var, year = time_var)
  
  boot_data <- data_subset
  # Get leads
  for (i in 1:lead_yrs) {
    boot_data <- boot_data %>%
      left_join(mutate(select(data_subset, -compare_var), year=year-i), by=c("year",unit_var),
                suffix=c("",paste0("_t",i)))
  }
  # Get lags
  for (j in 1:lag_yrs) {
    boot_data <- boot_data %>%
      left_join(mutate(select(data_subset,-compare_var), year = year+j), by=c("year",unit_var), 
                suffix=c("",paste0("_p",j)))
  }
  ## Determine if unit was untreated for full lag time
  boot_data$treat_prior_sum <- apply(select(boot_data,starts_with(paste0(untreat_var,"_p"))),
                                     1, sum)
  boot_data$treat_sum <- apply(select(boot_data,starts_with(paste0(treat_var,"_t"))),
                               1, sum) + pull(boot_data,treat_var)
  # treated units go from untreated the whole lag period to treated the whole lead period
  boot_data$treated <- ifelse(boot_data$treat_prior_sum==lag_yrs & 
                                boot_data$treat_sum==lead_yrs+1,1,0)
  # control units go from untreated to whole lag period to untreated the whole lead period
  boot_data$control <- ifelse(boot_data$treat_prior_sum==lag_yrs & 
                                boot_data$treat_sum==0,1,0)
  ## Calculate Weights
  # For Control Units: 
  # Numerator=# of treated obs in a state in a year
  # Denominator=# of control obs in a state in a year
  # For Treated Units: Weight is 1 provided there is at least one control unit
  boot_data <- boot_data %>%
    group_by(compare_var) %>%
    mutate(num_treated = sum(treated,na.rm=T)) %>%
    mutate(num_control = sum(control,na.rm=T)) %>%
    ungroup %>%
    mutate(obs_weight = case_when(
      treated == 1 & num_control > 0 & !is.na(num_control) & 
        !is.na(treat_prior_sum) & !is.na(treat_sum) ~ 1,
      control == 1 & num_treated > 0 & !is.na(num_treated) &
        !is.na(treat_prior_sum) & !is.na(treat_sum) ~ - num_treated / num_control,
      TRUE ~ 0
    ))
  return(boot_data)
}
